library(tidyverse)
library(magrittr)
tasa_desocupacion <- readRDS("Data/tasa_desocupacion_canton.rds") %>%
rename(canton = mpio)
encuesta <- readRDS("Data/encuestaECU20N.rds") %>%
transmute(
upm,
provincia = substr(upm, 1, 2),
canton = substr(upm, 1, 4),
ingreso = ingcorte,
pobreza = ifelse(ingreso<lp,1,0),
estrato = paste0(provincia, areageo2),
fep = `_fep`
) Fay Herriot en R y STAN
CEPAL - División de Estadísticas Sociales
Estimación de Fay Herriot normal.
El estimador directo no es el único insumo del modelo de áreas de Fay-Herriot; también lo es su varianza. El estimador puntual da un indicio de la localización del parámetro, y su varianza presenta el nivel de certeza o confianza sobre esta localización.
Al tratar con cifras provenientes de procesamientos con encuestas de hogares, es indispensable siempre tener en cuenta que el sustento inferencial recae en la estrategia de muestreo, definida como la dupla compuesta por el diseño de muestreo y el estimador escogido.
Datos de la encuesta
Definir el diseño muestral
library(survey)
library(srvyr)
diseno <-
as_survey_design(
ids = upm,
weights = fep,
strata = estrato,
nest = TRUE,
.data = encuesta
)Estimación directa
\[ \hat{\bar{Y}}_d = \frac{1}{\hat{N}_d} \sum_{s_d}w_{k}y_{k} \]
Estimacion_dir <- diseno %>% group_by(canton) %>%
summarise(nd = unweighted(n()),
thetahat = survey_mean(pobreza, vartype = c("se"), deff = TRUE)) %>%
full_join(tasa_desocupacion)
Estimacion_dir %<>%
mutate(
thetahat_se = ifelse(thetahat_se < 0.00001, 0.00001, thetahat_se),
thetahat_deff = ifelse(
thetahat_deff < 0.00001 | is.nan(thetahat_deff),
1,
thetahat_deff
),
provincia = substr(canton, 1, 2),
) %>%
select(provincia, canton:urban.coverfraction) %>%
arrange(thetahat_se)
head(Estimacion_dir)| provincia | canton | nd | thetahat | thetahat_se | thetahat_deff | tasa_desocupacion | stable_lights | crops.coverfraction | urban.coverfraction |
|---|---|---|---|---|---|---|---|---|---|
| 01 | 0107 | 16 | 0.1250000 | 1e-05 | 1 | 0.0191304 | 604.6471 | 1838.325 | 90.62353 |
| 01 | 0109 | 19 | 0.0000000 | 1e-05 | 1 | 0.0188440 | 3074.7922 | 10789.788 | 191.94902 |
| 01 | 0110 | 19 | 0.3684211 | 1e-05 | 1 | 0.0259572 | 979.8588 | 2701.945 | 153.76863 |
| 01 | 0111 | 19 | 0.0000000 | 1e-05 | 1 | 0.0170910 | 789.4824 | 1539.753 | 101.63529 |
| 01 | 0112 | 18 | 0.0000000 | 1e-05 | 1 | 0.0196813 | 577.0824 | 2070.541 | 38.30980 |
| 02 | 0202 | 19 | 0.0000000 | 1e-05 | 1 | 0.0338238 | 894.0784 | 5522.784 | 427.90196 |
Gráfico de la desviación estándar.
Dividiendo los datos en observados y NO observados.
data_dir <- Estimacion_dir %>%
filter(!is.na(thetahat) ,!is.na(tasa_desocupacion),
thetahat > 0)
data_syn <-
Estimacion_dir %>% anti_join(data_dir %>% select(canton)) %>%
filter(!is.na(tasa_desocupacion))
cor(data_dir$thetahat, data_dir$tasa_desocupacion)[1] 0.02709861
cor(data_dir$thetahat, log(data_dir$stable_lights))[1] -0.2633113
cor(data_dir$thetahat, log(data_dir$crops.coverfraction))[1] -0.1957303
cor(data_dir$thetahat, log(data_dir$urban.coverfraction))[1] -0.2226019
Modelo bayesiano
\[ Y \mid \mu,\sigma_e \sim N(\mu, \sigma_e)\\ \mu = \boldsymbol{X\beta} + V \] donde \(V \sim N(0 , \sigma_v)\).
Las distribuciones previas para \(\boldsymbol{\beta}\) y \(\sigma^2_v\)
\[ \beta_k \sim N(\mu, \tau^2)\\ \sigma^2_v \sim Inversa-Gamma(\alpha_1,\alpha_2) \]
Creando código de STAN
data {
int<lower=0> N1; // number of data items
int<lower=0> N2; // number of data items for prediction
int<lower=0> p; // number of predictors
matrix[N1, p] X; // predictor matrix
matrix[N2, p] Xs; // predictor matrix
vector[N1] y; // predictor matrix
vector[N1] sigma_e; // known variances
}
// The parameters accepted by the model. Our model
// accepts two parameters 'mu' and 'sigma'.
parameters {
vector[p] beta; // coefficients for predictors
real<lower=0> sigma2_v;
vector[N1] v;
}
transformed parameters{
vector[N1] theta;
real<lower=0> sigma_v;
theta = X * beta + v;
sigma_v = sqrt(sigma2_v);
}
model {
// likelihood
y ~ normal(theta, sigma_e);
// priors
beta ~ normal(0, 100);
v ~ normal(0, sigma_v);
sigma2_v ~ inv_gamma(0.0001, 0.0001);
}
generated quantities{
vector[N2] y_pred;
for(j in 1:N2) {
y_pred[j] = normal_rng(Xs[j] * beta, sigma_v);
}
}Preparando el código de STAN
library(cmdstanr)
fit_FH_Nornal <- cmdstan_model("Data/modelosStan/FH_normal.stan")Organizando datos covariables para STAN
Xdat <-
model.matrix(
thetahat ~ provincia + tasa_desocupacion,
data = data_dir
)
Xs <-
model.matrix(
canton ~ provincia + tasa_desocupacion ,
data = data_syn
)
temp <- setdiff(colnames(Xdat),colnames(Xs))
temp <- matrix(
0,
nrow = nrow(Xs),
ncol = length(temp),
dimnames = list(1:nrow(Xs), temp)
)
Xs <- cbind(Xs,temp)[,colnames(Xdat)]Preparando los datos para STAN
sample_data <- list(
N1 = nrow(data_dir), # Observados.
N2 = nrow(data_syn), # NO Observados.
p = ncol(Xdat), # Número de regresores.
X = as.matrix(Xdat), # Covariables Observados.
Xs = as.matrix(Xs), # Covariables NO Observados
y = as.numeric(data_dir$thetahat), # Estimación directa.
sigma_e = as.numeric(data_dir$thetahat_se) # Error de estimación
)Para ejecutar STAN en R tenemos la librería cmdstanr
model_FH_Nornal <-
fit_FH_Nornal$sample(
data = sample_data,
chains = 4,
parallel_chains = 4,
seed = 1234,
refresh = 1000
)Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 13.4 seconds.
Chain 1 finished in 13.5 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 13.6 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 13.8 seconds.
All 4 chains finished successfully.
Mean chain execution time: 13.6 seconds.
Total execution time: 14.0 seconds.
Comparando resultados.
library(posterior)
library(bayesplot)
library(patchwork)
N1 <- nrow(data_dir)
# Predicción de la estimación (Observados)
y_pred_B <- model_FH_Nornal$draws(variables = "theta", format = "matrix")
rowsrandom <- sample(nrow(y_pred_B), 100)
y_pred2 <- y_pred_B[rowsrandom, 1:N1]
# Comparando predicción con las cadenas
ppc_dens_overlay(y = as.numeric(data_dir$thetahat), y_pred2) Adicionando más covariables modelo.
Xdat <-
model.matrix(
thetahat ~ provincia + tasa_desocupacion +
log(stable_lights/100000) +
log(crops.coverfraction/100000) +
log(urban.coverfraction/100000),
data = data_dir
)
Xs <-
model.matrix(
canton ~ provincia + tasa_desocupacion +
log(stable_lights/100000) +
log(crops.coverfraction/100000) +
log(urban.coverfraction/100000) ,
data = data_syn
)
temp <- setdiff(colnames(Xdat),colnames(Xs))
temp <- matrix(
0,
nrow = nrow(Xs),
ncol = length(temp),
dimnames = list(1:nrow(Xs), temp)
)
Xs <- cbind(Xs,temp)[,colnames(Xdat)]Preparando los datos para STAN
sample_data <- list(
N1 = nrow(data_dir), # Observados.
N2 = nrow(data_syn), # NO Observados.
p = ncol(Xdat), # Número de regresores.
X = as.matrix(Xdat), # Covariables Observados.
Xs = as.matrix(Xs), # Covariables NO Observados
y = as.numeric(data_dir$thetahat), # Estimación directa.
sigma_e = as.numeric(data_dir$thetahat_se) # Error de estimación
)Para ejecutar STAN en R tenemos la librería cmdstanr
model_FH_Nornal <-
fit_FH_Nornal$sample(
data = sample_data,
chains = 4,
parallel_chains = 4,
seed = 1234,
refresh = 1000
)Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 11.1 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 12.0 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 12.5 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 12.9 seconds.
All 4 chains finished successfully.
Mean chain execution time: 12.1 seconds.
Total execution time: 13.0 seconds.
Comparando resultados.
# Predicción de la estimación (Observados)
y_pred_B <- model_FH_Nornal$draws(variables = "theta", format = "matrix")
rowsrandom <- sample(nrow(y_pred_B), 100)
y_pred2 <- y_pred_B[rowsrandom, 1:N1]
# Comparando predicción con las cadenas
ppc_dens_overlay(y = as.numeric(data_dir$thetahat), y_pred2) Comparando estimación directa y predicción de FH
theta_FH <- model_FH_Nornal$summary(variables = "theta")
plot(theta_FH$mean, data_dir$thetahat)
abline(b=1,a=0, col = "red")Estimación de Fay Herriot arcsin.
En su concepción más básica, el modelo de FH es una combinación lineal de covariables. Sin embargo, el resultado de esta combinación pueden tomar valores que se salen del rango aceptable en el que puede estar una proporción; es decir, en general el estimador de Fay-Herriot \(\theta \in R\), mientras que el estimador directo \(\theta \in (0,1)\).
Transformación arcoseno
\[ \hat{z}_d = arcsin\left( \sqrt{ \hat{\theta}_d} \right) \] donde \[ Var\left( \hat{z}_d \right) = \frac{\widehat{DEFF}_d}{4\times n_d} = \frac{1}{4\times n_{d,efectivo} } \]
Estimación directa
data_dir %<>% mutate(
n_effec = nd/thetahat_deff,
varhat = 1/(4*n_effec),
T_thetahat = asin(sqrt(thetahat))
)Modelo bayesiano
\[ Z \mid \mu,\sigma_e \sim N(\mu, \sigma_e)\\ \mu = \boldsymbol{X\beta} + V \\ \theta = \left(sin(\mu)\right)^2 \] donde \(V \sim N(0 , \sigma_v)\).
Las distribuciones previas para \(\boldsymbol{\beta}\) y \(\sigma^2_v\)
\[ \beta_k \sim N(\mu, \tau^2)\\ \sigma^2_v \sim Inversa-Gamma(\alpha_1,\alpha_2) \]
Creando código de STAN
data {
int<lower=0> N1; // number of data items
int<lower=0> N2; // number of data items for prediction
int<lower=0> p; // number of predictors
matrix[N1, p] X; // predictor matrix
matrix[N2, p] Xs; // predictor matrix
vector[N1] y; // predictor matrix
vector[N1] sigma_e; // known variances
}
// The parameters accepted by the model. Our model
// accepts two parameters 'mu' and 'sigma'.
parameters {
vector[p] beta; // coefficients for predictors
real<lower=0> sigma2_v;
vector[N1] v;
}
transformed parameters{
vector[N1] theta;
vector[N1] lp;
real<lower=0> sigma_v;
lp = X * beta + v;
sigma_v = sqrt(sigma2_v);
for(k in 1:N1){
theta[k] = pow(sin(lp[k]), 2);
}
}
model {
// likelihood
y ~ normal(lp, sigma_e);
// priors
beta ~ normal(0, 100);
v ~ normal(0, sigma_v);
sigma2_v ~ inv_gamma(0.0001, 0.0001);
}
generated quantities{
vector[N2] theta_pred;
vector[N2] lppred;
for(j in 1:N2) {
lppred[j] = normal_rng(Xs[j] * beta, sigma_v);
theta_pred[j] = pow(sin(lppred[j]), 2);
}
}Preparando el código de STAN
fit_FH_arcsin_Nornal <- cmdstan_model("Data/modelosStan/FH_arcsin_normal.stan")sample_data <- list(N1 = nrow(data_dir),
N2 = nrow(data_syn),
p = ncol(Xdat),
X = as.matrix(Xdat),
Xs = as.matrix(Xs),
y = as.numeric(data_dir$T_thetahat),
sigma_e = sqrt(data_dir$varhat)
)Para ejecutar STAN en R tenemos la librería cmdstanr
model_FH_arcsin_Nornal <-
fit_FH_arcsin_Nornal$sample(
data = sample_data,
iter_warmup = 1000,
iter_sampling = 1000,
chains = 4,
parallel_chains = 4,
seed = 1234,
refresh = 1000
)Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 11.6 seconds.
Chain 3 finished in 11.6 seconds.
Chain 4 finished in 11.6 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 12.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 11.7 seconds.
Total execution time: 12.2 seconds.
Comparando resultados.
y_pred_B <- model_FH_arcsin_Nornal$draws(variables = "theta", format = "matrix")
rowsrandom <- sample(nrow(y_pred_B), 100)
y_pred2 <- y_pred_B[rowsrandom, 1:N1]
ppc_dens_overlay(y = as.numeric(data_dir$thetahat), y_pred2) +
xlim(-0.2,1.2)library(ggplot2)
theta_FH <- model_FH_arcsin_Nornal$summary(variables = "theta")
data_dir %<>% mutate(pred_arcsin = theta_FH$mean)
ggplot(data = data_dir, aes(x = pred_arcsin, y = thetahat)) +
geom_abline(
slope = 1,
intercept = 0,
color = "red",
size = 2
) +
geom_point() + theme_bw(base_size = 20)Predicción en los dominios NO observados
theta_FH_pred <-
model_FH_arcsin_Nornal$summary(variables = "theta_pred")
data_syn %<>% mutate(pred_arcsin = theta_FH_pred$mean)
ggplot(data = data_syn, aes(x = pred_arcsin)) +
geom_density(size = 1.5, color = "red") +
xlim(-0.2, 1.2) +
theme_bw(base_size = 20)Estimación de los parámetros
model_FH_arcsin_Nornal$summary(
variables = c("sigma2_v"))| variable | mean | median | sd | mad | q5 | q95 | rhat | ess_bulk | ess_tail |
|---|---|---|---|---|---|---|---|---|---|
| sigma2_v | 0.0404187 | 0.0399483 | 0.0061822 | 0.0060506 | 0.0311431 | 0.0514301 | 1.000621 | 2530.177 | 2570.348 |
library(posterior)
library(bayesplot)
(mcmc_dens_chains(model_FH_arcsin_Nornal$draws("sigma2_v")) +
mcmc_areas(model_FH_arcsin_Nornal$draws("sigma2_v")))/
mcmc_trace(model_FH_arcsin_Nornal$draws("sigma2_v"))Estimación de Fay Herriot beta
Modelo bayesiano
\[ Y \mid a,b \sim beta(a, b)\\ a = \theta * \phi\\ b = (1 - \theta) * \phi\\ \] donde
\[ \theta = \frac{\exp{\left(\mu\right)}}{ 1+ \exp{\left(\mu\right)}}\\ \mu = \boldsymbol{X\beta} + V \] con \(V \sim N(0 , \sigma_v)\) y $= $
Las distribuciones previas para \(\boldsymbol{\beta}\) y \(\sigma^2_v\)
\[ \beta_k \sim N(\mu, \tau^2)\\ \sigma^2_v \sim Inversa-Gamma(\alpha_1,\alpha_2) \]
Creando código de STAN
data {
int<lower=1> N1; // sample size
int<lower=1> N2; // sample size
int<lower=1> p; // p predictors
vector<lower=0,upper=1>[N1] y; // response
matrix[N1,p] X;
matrix[N2,p] Xs;
vector<lower=0>[N1] phi; // dispersion parameter
}
parameters {
vector[p] beta;
real<lower=0> sigma2_v; // K predictors
vector[N1] v;
// reg coefficients
}
transformed parameters{
vector[N1] LP;
real<lower=0> sigma_v;
vector[N1] theta; // linear predictor
LP = X * beta + v;
sigma_v = sqrt(sigma2_v);
for (i in 1:N1) {
theta[i] = inv_logit(LP[i]);
}
}
model {
// model calculations
vector[N1] a; // parameter for beta distn
vector[N1] b; // parameter for beta distn
for (i in 1:N1) {
a[i] = theta[i] * phi[i];
b[i] = (1 - theta[i]) * phi[i];
}
// priors
beta ~ normal(0, 100);
v ~ normal(0, sigma_v);
sigma2_v ~ inv_gamma(0.0001, 0.0001);
// likelihood
y ~ beta(a, b);
}
generated quantities {
vector[N2] y_pred;
vector[N2] thetapred;
for (i in 1:N2) {
y_pred[i] = normal_rng(Xs[i] * beta, sigma_v);
thetapred[i] = inv_logit(y_pred[i]);
}
}Preparando el código de STAN
fit_FH_beta <- cmdstan_model("Data/modelosStan/FH_beta.stan")sample_data <- list(N1 = nrow(data_dir),
N2 = nrow(data_syn),
p = ncol(Xdat),
X = as.matrix(Xdat),
Xs = as.matrix(Xs),
y = as.numeric(data_dir$thetahat),
phi = data_dir$n_effec - 1
)Para ejecutar STAN en R tenemos la librería cmdstanr
model_FH_beta <-
fit_FH_beta$sample(
data = sample_data,
iter_warmup = 1000,
iter_sampling = 1000,
chains = 4,
parallel_chains = 4,
seed = 1234,
refresh = 1000
)Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 25.2 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 25.5 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 25.6 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 26.6 seconds.
All 4 chains finished successfully.
Mean chain execution time: 25.7 seconds.
Total execution time: 26.8 seconds.
Comparando resultados.
N1 <- nrow(data_dir)
y_pred_B <- model_FH_beta$draws(variables = "theta", format = "matrix")
rowsrandom <- sample(nrow(y_pred_B), 100)
y_pred2 <- y_pred_B[rowsrandom, 1:N1]
ppc_dens_overlay(y = as.numeric(data_dir$thetahat), y_pred2) +
xlim(-0.2,1.2)theta_FH <- model_FH_beta$summary(variables = "theta")
data_dir %<>% mutate(pred_beta = theta_FH$mean)
ggplot(data = data_dir, aes(x = pred_beta, y = thetahat)) +
geom_abline(
slope = 1,
intercept = 0,
color = "red",
size = 2
) +
geom_point() + theme_bw(base_size = 20)Comparando las predicciones de los modelos arcsin y beta
theta_FH_pred <-
model_FH_beta$summary(variables = "thetapred")
data_syn %<>% mutate(pred_beta = theta_FH_pred$mean)
ggplot(data = data_syn, aes(x = pred_beta)) +
geom_density(size = 1.5, color = "red") +
xlim(-0.2, 1.2) +
theme_bw(base_size = 20)ggplot(data = data_syn, aes(x = pred_beta, y = pred_arcsin)) +
geom_abline(
slope = 1,
intercept = 0,
color = "red",
size = 2
) +
geom_point() + theme_bw(base_size = 20)Estimación de los parámetros
model_FH_beta$summary(
variables = c("sigma2_v"))| variable | mean | median | sd | mad | q5 | q95 | rhat | ess_bulk | ess_tail |
|---|---|---|---|---|---|---|---|---|---|
| sigma2_v | 0.8872915 | 0.8728095 | 0.1457223 | 0.1426662 | 0.6775966 | 1.149355 | 1.000657 | 2234.071 | 3109.609 |
(mcmc_dens_chains(model_FH_beta$draws("sigma2_v")) +
mcmc_areas(model_FH_beta$draws("sigma2_v")))/
mcmc_trace(model_FH_beta$draws("sigma2_v"))Creando el mapa con los resultados.
library(sp)
library(sf)
library(tmap)
data_map <- rbind(
data_dir %>% select(canton,pred_beta),
data_syn %>% select(canton,pred_beta))
## Leer Shape del pais
ShapeSAE <- read_sf("ShapeECU/ecuador_Cantones.shp")
ShapeSAE %<>% mutate(canton = DPA_CANTON)
mapa <- tm_shape(ShapeSAE %>%
left_join(data_map, by = "canton"))
brks_lp <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1)
tmap_options(check.and.fix = TRUE)
Mapa_lp <-
mapa + tm_polygons(
"pred_beta",
breaks = brks_lp,
title = "Mapa de pobreza",
palette = "-YlOrRd"
) + tm_layout(asp = 0)
Mapa_lp